##############################
# STATE REACH AND DEVELOPMENT // PSRM // Replication
#
# Data Validation with State Capacity Index from Afrobarometer
#
##############################


# DATA #######################

# Load Afrobarometer enumeration area data
ea.data <- readRDS(file.path( "data", "data_enumarea_afrobaro.rds"))

# SETUP ######################

# Main outcomes: Index and constitutive parts
ea.dep.var <- c( Index = "ea_stateidx",
                 Electricity = "ea_electr", Water = "ea_water", Sewage = "ea_sewage", 
                 `Mobile Net.`= "ea_cellph", `Post Office`= "ea_post", `Tar road` = "ea_tarroad",
                 School = "ea_school",Clinic = "ea_clinic"  ,  `Police Stat.`= "ea_police",
                 `Police Pres.` = "ea_police_presence", `Military Pres.` = "ea_military_presence")


# PLOT: CORRELATION EA_STATEIDX -- TRAVEL TIMES ###

## Locals
bin.num <- 40 ## Number of bins in binned scatter plot

## Demean stateidx by country
dm.df <- lfe::demeanlist(ea.data[,c(ea.dep.var, "regcap", "natcap" )],
                         lapply(ea.data[, c("cow.round"), drop = F], as.factor))
colnames(dm.df) <- paste0(colnames(dm.df), ".dm")

## Prepare plot
plot.df <- do.call(rbind, lapply(ea.dep.var, function(v){
  do.call(rbind, lapply(c("regcap", "natcap"), function(dv){
    ### All data
    plot.df <- cbind(dm.df, dv = ea.data[,dv], v = dm.df[, paste0(v, ".dm")])
    plot.df <- na.omit(plot.df[, c("dv", "v")])
    
    ### Binned
    plot.df$dv.bins <- cut(plot.df[,"dv"], breaks = c(-1,unique(quantile(plot.df[,"dv"], probs = seq(0, 1, 1/bin.num)))))
    bin.df <- aggregate.data.frame(list(Observations = rep(1, nrow(plot.df)),
                                        dv = plot.df$dv,
                                        v = plot.df[,"v"]),
                                   plot.df[,c("dv.bins"), drop = F],
                                   FUN = sum)
    bin.df$v <- bin.df$v / bin.df$Observations
    bin.df$dv <- bin.df$dv / bin.df$Observations
    bin.df$type <- dv
    bin.df$variable <- v
    bin.df
  }))
}))  

## Plot main paper
p <- ggplot(plot.df[plot.df$variable == "ea_stateidx",], aes(x = log(1+dv), y = v, weight = Observations)) + 
  geom_point( aes(size = Observations), alpha = .5) +
  geom_smooth(method = "loess", lty = 1) +
  geom_smooth(method = "lm", lty = 2, se = F) +
  scale_size_area() +
  theme_minimal() + 
  ylab("State capacity index") + xlab(NULL) +
  facet_wrap(~ type, nrow = 1, strip.position = "bottom", 
             labeller = as_labeller(c(natcap = "Time to national capital (hours)", 
                                      regcap = "Time to regional capital (hours)") )) +
  scale_x_continuous(breaks = log(1 + c(0, 1, 2.5, 5, 10, 20)), 
                     labels = c(0, 1, 2.5, 5, 10, 20)) +
  theme(strip.background = element_blank(),
        strip.placement = "outside",
        panel.grid.minor.x = element_blank(),
        panel.spacing = unit(1, "lines"))

### Save
png(file.path(fig.path, "fig2_ea_afrobaro_stateidx_main.png"), width = 7, height = 3, res = 400, unit = "in")
print(p)
dev.off()


## Plot appendix with all variables

### Variable laps
var.labs <- names(ea.dep.var)
names(var.labs) <- ea.dep.var

### Sort
plot.df$variable <- factor(plot.df$variable, levels = unique(plot.df$variable), ordered = T)

### Plot
p <- ggplot(plot.df, aes(x = log(1+dv), y = v, weight = Observations)) + 
  geom_point( aes(size = Observations), alpha = .5) +
  geom_smooth(method = "loess", lty = 1) +
  geom_smooth(method = "lm", lty = 2, se = F) +
  scale_size_area() +
  theme_minimal() + 
  facet_grid(variable ~ type, switch = "both",
             labeller = as_labeller(c(natcap = "Time to national capital (log)", 
                                      regcap = "Time to regional capital (log)", var.labs) ),
             scales = "free_y") +
  ylab(NULL) +xlab(NULL) +
  scale_x_continuous(breaks = log(1 + c(0, 1, 2.5, 5, 10, 20)), 
                     labels = c(0, 1, 2.5, 5, 10, 20)) +
  theme(strip.background = element_blank(),
        strip.placement = "outside",
        panel.grid.minor.x = element_blank(),
        panel.spacing = unit(1, "lines"))


### Save
png(file.path(fig.path, "figa5_ea_afrobaro_stateidx_appx.png"), width = 7, height = 10, res = 400, unit = "in")
print(p)
dev.off()


## EA Level Regression: GeoDesic Distance vs. Travel Times ##
stub <- "taba1_crosssec_ea_validation"

# Specifications
specs <- list(c("log(natcap.geodesic)"), 
              c( "log(1+natcap)"), 
              c("log(natcap.geodesic)", "log(1+natcap)"), 
              c("log(regcap.geodesic)"), 
              c("log(1+regcap)"), 
              c("log(regcap.geodesic)", "log(1+regcap)"))

# Fixed effects per country-survey
fe.global <- "survey.id"

# Clustering of standard errors
cluster.global <- "pts.id + cow.year"

# Keep consistent data set
this.ea.data <- ea.data[!is.na(ea.data$natcap.geodesic) & !is.na(ea.data$natcap) &
                          !is.na(ea.data$regcap.geodesic) & !is.na(ea.data$regcap),]

### Estimtate
model.ls <- lapply(specs, function(s){
  print(s)
  form <- make_form(dv = "ea_stateidx", expl = c(s), 
                    fe = fe.global, 
                    iv = "0",
                    se = cluster.global )
  m <- felm(form,  data = this.ea.data, keepX = F, keepCX = F)
  print(summary(m))
  m
})

### Table
# ... prepare
add.lines <- list(latex.addline("Unit:", rep("EA", 6)),
                  latex.addline("Survey FE:", rep("yes", length(model.ls))),
                  latex.mean.dv(model.ls),latex.mean.sd(model.ls))

# ... save main
fileConn <- file(file.path(tab.path, paste0(stub,".tex")))
writeLines(stargazer(model.ls,
                     title="Logged distances to national and regional capitals correlate with state capacity index",
                     multicolumn = F,
                     column.labels = "State capacity index",
                     column.separate = c(6), 
                     column.sep.width ="-10pt",
                     dep.var.caption = "",dep.var.labels = rep("", length(model.ls)),
                     covariate.labels = c("Nat. capital: geodesic","Nat. capital: time","Reg. capital: geodesic","Reg. capital: time"),
                     font.size = "scriptsize",
                     notes.align = "c", label=stub, align =T,
                     add.lines = add.lines,digits = 3, intercept.top = T,intercept.bottom = F,
                     omit.stat = c("rsq","res.dev","ser")), 
           fileConn)
close(fileConn)

# DROP DATA 
# to save some space.. 
rm(ea.data, this.ea.data)
